************************************************************************
* Date: October 26, 2017
* David Simon
* Code for matching Schools to Road
* JAH: Updated to find the 5 nearest ROADWAYs


/*
1. For each school, calculate shortest distance between each school and  each 
road.    

2. Assign bearing  between road and school
.
3. merge on Mattis data matched to school with wind direction.  

*/
************************************************************************



set more off
cap log close


	
**************Globals********************

	global home 

	global winddata 
	global roaddata 
	global schooldata 
	global output 
	global samples 


log using "$output\allsclmjrd_mjrrds.log", replace


**********start by Cleaning up the road data**************

use "$home\Traffic\Traffic CDS\aadt_latlong.dta", replace

rename (traflat traflong) (latitude longitude)
	gen aadtroad=1
	label var aadtroad "an aadt road"
	
* limit this to roads with an aadt in the top 75 percentile

sum AADT, detail  /*this tells us that the 75 percentile of cars is 18000 */

gen id=_n

**getting road names from other sources: 


preserve


	use "$home\roadsetup\rawdata\us_roads\usroads.dta", clear
	keep ROADWAY ROUTE
	tempfile roadnames
	save "`roadnames'", replace

	use "$home\roadsetup\rawdata\interstates\interstates.dta", clear
	keep ROADWAY ROUTE
	append using `roadnames'
	save "`roadnames'", replace	
	
	gen order=.
		replace order=1 if strpos(ROUTE, "I")
		replace order=2 if strpos(ROUTE, "US")
		replace order=3 if strpos(ROUTE, "SR")

	sort ROADWAY order
	
	duplicates drop ROADWAY, force

	drop order

	save "`roadnames'", replace	
	
restore


merge m:1 ROADWAY using `roadnames'
	drop if _merge==2
	drop _merge

sort id

save "$roaddata\aadt\mjrrdsjah.dta", replace



***************Create line segments***********************************	
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
foreach var of varlist latitude longitude {
	gen `var'_prior=`var'[_n-1]
	replace `var'_prior=. if `var'==0
	replace `var'_prior=. if `var'_prior==0
	}


***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	geodist latitude longitude latitude_prior longitude_prior, generate(segmentdistance) miles
	label var segmentdistance "Road segment distance in miles"
	sum segmentdistance, d
	*hist segmentdistance if segmentdistance>0.1, freq
	
***AND NOW: DIVIDING UP SEGMENTS >0.1: For ordering purposes:
	gen n=_n

	gen segexp=floor(segmentdistance*100)+1  /**If a segment is longer than 0.01 mi,
	round up to the nearest 10th, multiply by 10, and divide it into that number of segments
	Change X in segmentdistance*X to make the minimum distance 1/X miles */
		
	expand segexp
	sort n
	*get a count by segment:
	by n: generate segord=_n

	**Replacing:
	foreach var of varlist latitude longitude {
		gen `var'_original=`var'
		*take the previous latitude, add on the extra fraction of the distance to get to the next value in the data 
		*(or each segment, so multiple by order 1-2-3...)
		gen dist`var' = (`var'-`var'_prior)/segexp if segexp>1
		replace `var'=`var'_prior+segord*(`var'-`var'_prior)/segexp if segexp>1
		*useful for checking: 
		order `var' `var'_original `var'_prior segexp dist`var' segord segmentdistance n
												}													
	drop id distlatitude distlongitude
	gen id=_n
	sum id
	order latitude longitude segmentdistance AADT ROADWAY BEGIN_POST END_POST Shape_Leng id


	

	**Chceck 1:  Verify no more long segments:
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
	*foreach var of varlist latitude longitude {
	*	gen `var'_prior_2=`var'[_n-1]
	*	replace `var'_prior_2=. if `var'==0
	*	replace `var'_prior_2=. if `var'_prior==0
	*	}
	***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	*geodist latitude longitude latitude_prior_2 longitude_prior_2, generate(segmentdistance_2) miles
	*	label var segmentdistance "Road segment distance in miles"
	*	sum segmentdistance_2, d
	*	hist segmentdistance_2 if segmentdistance_2>0.1, freq
	***saving this new data as a temporary file:
	
	destring ROADWAY, replace
	
	tempfile majorrd
	save "`majorrd'"	

********************NOW DELETE THE non-interstates/non-US highways***************	
keep if strpos(ROUTE, "I")|strpos(ROUTE, "US") // JAH: Move this below, after we've divided up the segments
	
***Map them:	
twoway 	(scatter latitude longitude if strpos(ROUTE, "US"), msize(vtiny) mcolor(gray )) ///
		(scatter latitude longitude if strpos(ROUTE, "I"),  msize(vtiny) mcolor(black)), ///
		graphregion(color(white)) bgcolor(white) ///
		xla(, labcolor(white) tlength(0)) xtitle("") ///
		yla(, labcolor(white) tlength(0)) ytitle("") ///
		legend(order(2 "Interstates" 1 "US Highways")) ///
		ysize(6)
		
		graph export "$home\School wind pollution\output\whole sample results\Map_of_Majrrds.png", as(png) replace

		
********************now create calculate nearest road to school***************

clear
tempfile majorrd_matches
gen ncessch=.
save "`majorrd_matches'"

***Identify the number of schools that we'll loop through:
use "$schooldata\NCES_2010.dta", replace
	tempvar all
	gen `all'=_N
	disp `all'

**2. To get the first-fifth nearest road, by school
*forv schoollist=1/5 {  //Ljust do 5 schools for trouble-shooting
forv schoollist=1/4289 {  //Loop thru for every school individually - COULDN"T FIGURE OUT HOW TO GET A TEMPVAR TO SAY 4289, in case something changes with the file?
	forv i=1/5 {
		if `i'==1 {			// First time around, make a temporary file of roads to use for matching; this will be reduced down later
			use "`majorrd'", clear
			tempfile majorrd_`schoollist'
			qui: save "`majorrd_`schoollist''"
			display "School `schoollist' of 4289"
			}	
		use "$schooldata\NCES_2010.dta", replace // open up NCES list of schools
		
		qui: keep if _n==`schoollist' // TO LIMIT TO ONE SCHOOL at a time
		qui: geonear ncessch latcod loncod using "`majorrd_`schoollist''", neighbors(id latitude longitude) miles //this is where mi_to_nid is made

		*merge road data - must set to local drive
		rename nid id

		qui: merge m:1 id using `majorrd'
		qui: tab _merge


		* DES 6/16/17: there are more road points than monitors so not every pollution monitor
		* data point has a match in the lac_STROUTEs data, should be fine to drop unmatched;

		qui: drop if _merge==2
		drop _merge

		rename (latitude longitude) (road_lat road_lon)
		
		rename id road_id`i'
		foreach var of varlist road_lat road_lon mi_to_nid AADT ROADWAY ROUTE {
			rename `var' `var'`i'
			}

		keep road_lat`i' road_lon`i' latcod loncod ncessch schnam mi_to_nid`i' ROADWAY`i' ROUTE`i' AADT`i'
		
		local roadtodrop=ROADWAY`i'

		append using "`majorrd_matches'" // append to the list of school-to-road matches
		qui: save "`majorrd_matches'", replace
		
		***NOW, delete the ROADWAY we just matched on and loop back over the next closest one:
		use "`majorrd_`schoollist''", clear
		qui: drop if ROADWAY==`roadtodrop'
		qui: save "`majorrd_`schoollist''", replace
		}
	}

use "`majorrd_matches'", clear

foreach var of varlist ROUTE* {
	forv i=1/4 {
		replace `var'=`var'[_n-`i'] if ncessch==ncessch[_n-`i']&`var'[_n-`i']!=""
		replace `var'=`var'[_n+`i'] if ncessch==ncessch[_n+`i']&`var'[_n+`i']!=""
	}
 }

collapse  latcod loncod mi_to_nid5 road_lat5 road_lon5 AADT5 ROADWAY5 mi_to_nid4 road_lat4 road_lon4 AADT4 ROADWAY4 mi_to_nid3 road_lat3 road_lon3 AADT3 ROADWAY3 mi_to_nid2 road_lat2 road_lon2 AADT2 ROADWAY2 mi_to_nid1 road_lat1 road_lon1 AADT1 ROADWAY1, by( ncessch schnam ROUTE*)
	
**************Now try to calculate initial bearing of school to road  *******************
* check this against this formula:  http://www.movable-type.co.uk/scripts/latlong.html

	g lat1=latcod*_pi/180
	g lon1=loncod*_pi/180

forv i=1/5 {
	g lat2`i'=road_lat`i'*_pi/180
	g lon2`i'=road_lon`i'*_pi/180
//=ATAN2(COS(lat1)*SIN(lat2)-SIN(lat1)*COS(lat2)*COS(lon2-lon1), SIN(lon2-lon1)*COS(lat2)) - from http://www.movable-type.co.uk/scripts/latlong.html
	g theta`i'=atan2(sin(lon2`i'-lon1)*cos(lat2`i'), cos(lat1)*sin(lat2`i')-sin(lat1)*cos(lat2`i')*cos(lon2`i'-lon1)) 
	gen degrees`i' = (180*theta`i')/_pi
	g angle_degrees`i'=mod((degrees`i'+360), 360)
}

*reality check: this gives us a bearing between 1 and 360
	sum angle_degrees*



	list angle_degrees* latcod* loncod* road_lat* road_lon* in 1/5
	
	
save "$samples\schooltomajorrds_mjrrds_5closest.dta", replace
